In [1]:
import lasio 
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsRegressor # for KNN regression

import matplotlib.pyplot as plt # for data visualization
import plotly.express as px # for data visualization
In [2]:
# Drop the logs LTEN, MINMK, AVTX, BVTX, and ITT
import lasio

# Load the LAS file
las = lasio.read("1054310680.LAS")

# Create a dataframe from the well log data
well = las.df()
# Drop the logs LTEN, MINMK, AVTX, BVTX, and ITT
logs_to_drop = ["LTEN", "MINMK", "AVTX", "BVTX", "ITT", "SPOR", "CNPOR", "DPOR",'DCAL','SP','PE','MELCAL','RHOB','RHOC']
df = well.drop(columns=logs_to_drop, errors='ignore')

# Drop rows with missing values
df = df.dropna(how="any")

# Print the shape and check for any missing values
print(df.shape)
print(df.isnull().sum())
(3409, 10)
TBHV     0
ABHV     0
RXORT    0
RILD     0
RILM     0
RLL3     0
GR       0
DT       0
MEL15    0
MEL20    0
dtype: int64
In [3]:
df
Out[3]:
TBHV ABHV RXORT RILD RILM RLL3 GR DT MEL15 MEL20
DEPT
1400.0 591.1821 309.9621 -33.2771 5.6551 6.2686 13.2490 57.4060 72.2373 32.4823 22.6156
1400.5 590.9929 309.8554 -48.0554 5.9328 7.3321 20.2866 52.1534 68.3856 36.9438 28.6958
1401.0 590.8032 309.7481 -45.3393 6.2462 7.9173 19.9243 52.5652 67.2499 40.4439 32.9060
1401.5 590.6128 309.6403 -28.3571 6.5754 7.8046 13.5833 58.3222 68.8124 34.3923 25.4983
1402.0 590.4225 309.5325 -17.2047 6.8085 7.3175 10.5734 67.5882 72.1245 24.9305 14.5826
... ... ... ... ... ... ... ... ... ... ...
3102.0 0.7963 0.3838 -4.6366 11.6097 10.7584 13.0719 34.0328 63.0116 19.2898 14.8824
3102.5 0.6370 0.3070 -6.4359 10.5467 9.8785 12.4345 36.2984 63.6701 17.8834 13.6291
3103.0 0.4773 0.2298 -10.7346 9.5389 9.1828 12.5537 36.2811 64.0720 17.7503 13.5456
3103.5 0.3178 0.1528 -14.2869 8.6482 8.4517 12.4643 35.2736 64.2317 17.7073 13.5098
3104.0 0.1587 0.0762 -14.0149 7.9253 7.6452 11.3432 34.4259 64.2915 17.6699 13.4919

3409 rows × 10 columns

In [4]:
import pandas as pd
import matplotlib.pyplot as plt

# Calculate the Pearson correlation coefficients
correlations = df.corr()['DT']

# Remove the correlation between 'GR' and itself
correlations = correlations.drop('DT')

# Plot the correlations
plt.figure(figsize=(10, 6))
correlations.plot(kind='bar')
plt.xlabel('Logs')
plt.ylabel('Pearson Correlation Coefficient')
plt.title('Pearson Correlation Coefficients between DT and Other Logs')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [5]:
# Define brief names and units for each log
# Define brief names and units for each log
log_brief_units = {
    'TBHV': 'FT3', 
    'ABHV': 'FT3',
    'GR': 'GAPI',
    'RILD': 'Ω-m',
    'RILM': 'Ω-m',
    'RLL3': 'Ω-m',
    'CILD': 'MMHO/M',
    'CILM': 'MMHO/M ',
    'CLL3': 'MMHO/M',
    'RXORT': 'Ω-m',
    'SP': 'mV',
    'DCAL': 'inches',
    'DPOR': 'PU',
    'PE': 'unitless',
    'RHOB': 'g/cc',
    'RHOC': 'g/cc',
    'MEL15': 'Ω-m',
    'MEL20': 'Ω-m',
    'DT': 'μs/ft',
    'MELCAL': 'inches'
}
def plot_all_logs_with_brief_units_descending(data):
    """Plots multiple well logs with brief units, ensuring depth is in descending order.

    Args:
        data: A pandas DataFrame containing well log data. The index should represent depth.

    Returns:
        None
    """

    # Ensure the depth data is in descending order
    data_sorted = data.sort_index(ascending=False)

    num_logs = len(data_sorted.columns)
    fig, axes = plt.subplots(nrows=1, ncols=num_logs, figsize=(num_logs * 3, 25), sharey=True, dpi=500)

    for ax, log in zip(axes, data_sorted.columns):
        # Apply log scale for resistivity and micrologs if necessary
        if log in ['RILD', 'RILM', 'RLL3', 'MEL15', 'MEL20']:
            ax.plot(np.log10(data_sorted[log]), data_sorted.index, label=log)
        else:
            ax.plot(data_sorted[log], data_sorted.index, label=log)

        # Set the title to the log name with brief unit
        ax.set_title(f"{log} ({log_brief_units[log]})", fontsize=30)
        ax.grid()

        # Increase the font size of the x-axis ticks (log values)
        ax.tick_params(axis='x', labelsize=25)

    # Set the y-axis label and invert it for descending depth
    axes[0].set_ylabel("Depth (ft)", fontsize=35)
    axes[0].invert_yaxis()

    # Increase the font size of the y-axis ticks (depth numbers)
    axes[0].tick_params(axis='y', labelsize=25)

    plt.tight_layout()
    plt.show()

# Example usage
# Assuming you have a pandas DataFrame named 'df' containing your well log data
plot_all_logs_with_brief_units_descending(df) 
No description has been provided for this image
In [6]:
df.describe()
Out[6]:
TBHV ABHV RXORT RILD RILM RLL3 GR DT MEL15 MEL20
count 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000 3409.000000
mean 291.530846 150.879273 -36.330953 6.675208 7.136873 19.769958 74.716949 79.528714 22.982292 18.621075
std 168.437966 87.268945 26.366208 7.105292 10.934110 30.025319 36.455734 13.285295 15.214074 17.093753
min 0.158700 0.076200 -118.551300 0.578800 0.613900 2.420100 17.597800 51.511000 2.270400 1.507400
25% 146.818300 76.450900 -51.670300 3.603400 3.495600 6.896500 45.722800 67.564500 14.342700 7.550700
50% 292.423600 151.771200 -23.666600 4.483000 4.455100 9.246800 77.643700 83.744300 18.099700 13.277400
75% 432.684900 221.749900 -17.792100 6.895400 6.942300 21.662700 97.618200 88.251400 25.508600 22.875300
max 591.182100 309.962100 11.621100 85.174000 181.763200 674.902200 520.418600 111.715400 85.618800 132.627800
In [7]:
# Drop the logs LTEN, MINMK, AVTX, BVTX, and ITT
logs_to_drop = ["LTEN", "MINMK", "AVTX", "BVTX", "ITT", "SPOR", "CNPOR", "DPOR",'DCAL','SP','PE','MELCAL','RHOB','RHOC']
df = well.drop(columns=logs_to_drop, errors='ignore')

# Drop rows with missing values
df = df.dropna(how="any")

# Print the shape and check for any missing values
print(df.shape)
print(df.isnull().sum())
(3409, 10)
TBHV     0
ABHV     0
RXORT    0
RILD     0
RILM     0
RLL3     0
GR       0
DT       0
MEL15    0
MEL20    0
dtype: int64
In [8]:
from sklearn.model_selection import train_test_split
train , test = train_test_split(df, test_size = 0.3)

# Use all available features for training
x_train = train.drop('DT', axis=1)  # Features excluding the target
y_train = train['DT']  # Target variable

x_test = test.drop('DT', axis=1)  # Features for testing
y_test = test['DT']  # Target variable
In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

x_train_scaled = scaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train_scaled)

x_test_scaled = scaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test_scaled)
In [10]:
#import required packages
from sklearn import neighbors
from sklearn.metrics import mean_squared_error 
from math import sqrt
import matplotlib.pyplot as plt
%matplotlib inline
In [11]:
from sklearn import neighbors
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse_val = []  # To store RMSE values for different k

for K in range(1, 20):  # k values from 1 to 20
    model = neighbors.KNeighborsRegressor(n_neighbors=K)

    model.fit(x_train, y_train)  # Fit the model
    pred = model.predict(x_test)  # Make predictions on test set
    error = sqrt(mean_squared_error(y_test, pred))  # Calculate RMSE
    rmse_val.append(error)  # Store RMSE values
    print('RMSE value for k=', K, 'is:', error)
RMSE value for k= 1 is: 3.4194896717493375
RMSE value for k= 2 is: 3.1716217478044477
RMSE value for k= 3 is: 3.1310420022274017
RMSE value for k= 4 is: 3.143750576329555
RMSE value for k= 5 is: 3.1786921564660036
RMSE value for k= 6 is: 3.2373700583496583
RMSE value for k= 7 is: 3.2975132016834134
RMSE value for k= 8 is: 3.3578073594289233
RMSE value for k= 9 is: 3.3865898653990234
RMSE value for k= 10 is: 3.4634356573953977
RMSE value for k= 11 is: 3.5628731665787274
RMSE value for k= 12 is: 3.585420293996214
RMSE value for k= 13 is: 3.638362584975556
RMSE value for k= 14 is: 3.6918202394921416
RMSE value for k= 15 is: 3.731988452535047
RMSE value for k= 16 is: 3.7884207968686083
RMSE value for k= 17 is: 3.8336088321771844
RMSE value for k= 18 is: 3.873115125781757
RMSE value for k= 19 is: 3.921092997209119
In [12]:
# import matplotlib.pyplot as plt

# Create a DataFrame for the RMSE values
curve = pd.DataFrame(rmse_val, columns=['RMSE'])
curve.index += 1  # Start index at 1 for k values

# Plotting the RMSE values
plt.figure(figsize=(10, 6))
plt.plot(curve.index, curve['RMSE'], marker='o')
plt.title('RMSE vs. Number of Neighbors (k)')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('RMSE')
plt.xticks(curve.index)  # Show all k values on x-axis
plt.grid()
plt.show()
No description has been provided for this image
In [42]:
modelR = KNeighborsRegressor(n_neighbors=3, #default=3
                             weights='uniform', #{‘uniform’, ‘distance’} or callable, default='uniform'
                             algorithm='auto', #{‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, default=’auto’
                             #leaf_size=30, #default=30, Leaf size passed to BallTree or KDTree.
                             #p=2, #default=2, Power parameter for the Minkowski metric.
                             #metric='minkowski', #default=’minkowski’, with p=2 is equivalent to the standard Euclidean metric.
                             metric_params=None, #dict, default=None, Additional keyword arguments for the metric function.
                             n_jobs=-1 #default=None, The number of parallel jobs to run for neighbors search, -1 means using all processors
                            )
In [44]:
reg = modelR.fit(x_train, y_train)
In [45]:
# Predict on training data

pred_values_tr = modelR.predict(x_train)

# Predict on a test data

pred_values_te = modelR.predict(x_test)
In [48]:
# Basic info about the model
print("")
print('****************** KNN Regression ******************')    

print("")
scoreR_te = modelR.score(x_test, y_test)
print('Test Accuracy Score: ', scoreR_te)
scoreR_tr = modelR.score(x_train, y_train)
print('Training Accuracy Score: ', scoreR_tr)

print('---------------------------------------------------------')
****************** KNN Regression ******************

Test Accuracy Score:  0.9421505741956218
Training Accuracy Score:  0.9763987330543987
---------------------------------------------------------
In [50]:
# Create a copy of each dataframe before modifying
df_train_new=train.copy()
df_test_new=test.copy()

# ------- Training DataFrame -------
# Attach predicted class labels and values

df_train_new['Predicted DT']=pred_values_tr



# ------- Test DataFrame -------
# Attach predicted class labels and values

df_test_new['Predicted DT']=pred_values_te




# ------- Combined DataFrame -------
# Combine training and testing dataframes back into one
df_new=pd.concat([df_train_new, df_test_new], ignore_index=False, axis=0, sort=False)
data= df_new.sort_index(ascending=True)
data
Out[50]:
TBHV ABHV RXORT RILD RILM RLL3 GR DT MEL15 MEL20 Predicted DT
DEPT
1400.0 591.1821 309.9621 -33.2771 5.6551 6.2686 13.2490 57.4060 72.2373 32.4823 22.6156 68.259967
1400.5 590.9929 309.8554 -48.0554 5.9328 7.3321 20.2866 52.1534 68.3856 36.9438 28.6958 66.933933
1401.0 590.8032 309.7481 -45.3393 6.2462 7.9173 19.9243 52.5652 67.2499 40.4439 32.9060 66.933933
1401.5 590.6128 309.6403 -28.3571 6.5754 7.8046 13.5833 58.3222 68.8124 34.3923 25.4983 69.334233
1402.0 590.4225 309.5325 -17.2047 6.8085 7.3175 10.5734 67.5882 72.1245 24.9305 14.5826 73.154167
... ... ... ... ... ... ... ... ... ... ... ...
3102.0 0.7963 0.3838 -4.6366 11.6097 10.7584 13.0719 34.0328 63.0116 19.2898 14.8824 63.584567
3102.5 0.6370 0.3070 -6.4359 10.5467 9.8785 12.4345 36.2984 63.6701 17.8834 13.6291 63.584567
3103.0 0.4773 0.2298 -10.7346 9.5389 9.1828 12.5537 36.2811 64.0720 17.7503 13.5456 63.584567
3103.5 0.3178 0.1528 -14.2869 8.6482 8.4517 12.4643 35.2736 64.2317 17.7073 13.5098 63.584567
3104.0 0.1587 0.0762 -14.0149 7.9253 7.6452 11.3432 34.4259 64.2915 17.6699 13.4919 63.584567

3409 rows × 11 columns

In [52]:
def plotter():
    # Create a single plot
    fig, ax = plt.subplots(figsize=(6,20))
    
    # Define the logs and colors
    logs = ['DT', 'Predicted DT']
    colors = ['red', 'Green']
    
    # Plot both curves on the same axis
    for log, color in zip(logs, colors):
        ax.plot(data[log], data.index, color=color, label=log)
    
    # Set labels and invert y-axis
    ax.invert_yaxis()
    ax.set_xlabel("μs/ft")
    ax.set_ylabel("Depth (ft)")
    
    # Add a grid and legend
    ax.grid()
    ax.legend()

# Call the function to plot
plotter()
No description has been provided for this image
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: